Observed log-chlorophyll at representative station for the St. Lucie Estuary
library(tidyverse)
library(lubridate)
library(mgcv)
library(plotly)
library(WRTDStidal)
library(gridExtra)
source('R/funcs.R')
# flow data, left moving window average of 30 days
data(sl_fldat)
fl_dat <- sl_fldat %>%
rename(date = Date) %>%
mutate(
qsm = stats::filter(q, rep(1, 30)/30, sides = 1, method = 'convolution')
)
# format the data to model
data(sl_dat)
sl_mod <- sl_dat %>%
rename(date = Date) %>%
mutate(
doy = yday(date),
dec_time = decimal_date(date),
yr = year(date),
yr = factor(yr),
mo = month(date, label = T)
) %>%
left_join(fl_dat, by = 'date') %>%
mutate(
flo = log(qsm),
lnchl = log(1 + chl)
) %>%
select(-q, -qsm, -sal)
# plot, all
p <- ggplot(sl_mod, aes(x = date, y = lnchl)) +
geom_line() +
theme_bw()
ggplotly(p)
# boxplot, by years
p <- ggplot(sl_mod, aes(x = yr, y = lnchl)) +
geom_boxplot() +
theme_bw()
ggplotly(p)
# boxplot, by month
p <- ggplot(sl_mod, aes(x = mo, y = lnchl)) +
geom_boxplot() +
theme_bw()
ggplotly(p)
Some simple GAMs to explore annual, seasonal trends.
# smooths to evaluate
smths <- c(
"s(dec_time, bs = 'tp')",
"s(doy, bs = 'cc')",
"te(dec_time, doy, bs = c('tp', 'cc'))"
)
# get all combinations of smoothers to model, one to many
frms <- list()
for(i in seq_along(smths)){
frm <- combn(smths, i) %>%
apply(2, function(x){
paste(x, collapse = ' + ') %>%
paste('lnchl ~ ', .) %>%
formula
})
frms <- c(frms, frm)
}
# create models from smooth formula combinations
mods <- map(frms, function(frm){
gam(frm,
knots = list(doy = c(1, 366)),
data = sl_mod,
na.action = na.exclude
)
})
names(mods) <- paste0('mod', seq_along(mods))
Summary stats of annual, seasonal models:
# smoother stats of GAMs
map(mods, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>%
enframe %>%
unnest %>%
kable
| name | smoother | edf | Ref.df | F | p.value |
|---|---|---|---|---|---|
| mod1 | s(dec_time) | 1.491837 | 1.833950 | 8.0481357 | 0.0017935 |
| mod2 | s(doy) | 4.209953 | 8.000000 | 6.5199494 | 0.0000000 |
| mod3 | te(dec_time,doy) | 8.433081 | 11.101531 | 5.1741398 | 0.0000001 |
| mod4 | s(dec_time) | 1.882782 | 2.353842 | 6.0328718 | 0.0014303 |
| mod4 | s(doy) | 4.222344 | 8.000000 | 6.6335533 | 0.0000000 |
| mod5 | s(dec_time) | 1.000000 | 1.000000 | 5.6591977 | 0.0179168 |
| mod5 | te(dec_time,doy) | 9.559921 | 15.000000 | 3.3266003 | 0.0000000 |
| mod6 | s(doy) | 4.206482 | 8.000000 | 5.8392276 | 0.0000000 |
| mod6 | te(dec_time,doy) | 2.741101 | 3.810399 | 3.9993657 | 0.0043228 |
| mod7 | s(dec_time) | 1.000000 | 1.000000 | 10.6360338 | 0.0012203 |
| mod7 | s(doy) | 4.164553 | 8.000000 | 5.1577131 | 0.0000000 |
| mod7 | te(dec_time,doy) | 2.852311 | 15.000000 | 0.2807338 | 0.1611397 |
# summary stats of GAMs
map(mods, ~ data.frame(
AIC = AIC(.x),
R2 = summary(.x)$r.sq)) %>%
enframe %>%
unnest %>%
kable
| name | AIC | R2 |
|---|---|---|
| mod1 | 654.5720 | 0.0350432 |
| mod2 | 621.1283 | 0.1312536 |
| mod3 | 619.4651 | 0.1457643 |
| mod4 | 609.3679 | 0.1649803 |
| mod5 | 619.0978 | 0.1517706 |
| mod6 | 609.1895 | 0.1674090 |
| mod7 | 608.6850 | 0.1711443 |
# prediction data
pred_dat <- crossing(
yr = seq(floor(min(sl_mod$dec_time)), ceiling(max(sl_mod$dec_time))),
doy = seq(1, 365, by = 5)
) %>%
unite('date', yr, doy, sep = '-', remove = F) %>%
mutate(
date = as.Date(date, format = '%Y-%j'),
dec_time = decimal_date(date),
mo = month(date, label = TRUE),
yr = year(date)
) %>%
left_join(., fl_dat[, c('date', 'qsm')]) %>%
mutate(flo = log(qsm)) %>%
select(-qsm)
# predictions
sl_res <- map(mods, function(x){
pred_dat %>%
mutate(
pred = predict(x, newdata = pred_dat)
)
}) %>%
enframe('mods') %>%
unnest
# plot
p <- ggplot(sl_res, aes(x = date)) +
geom_point(data = sl_mod, aes(y = lnchl), size = 0.5) +
geom_line(aes(y = pred, colour = mods)) +
theme_bw() +
theme(
legend.position = 'top',
legend.title = element_blank()
)
ggplotly(p)
# plot
p <- ggplot(sl_res, aes(x = doy, group = factor(yr), colour = yr)) +
geom_line(aes(y = pred)) +
theme_bw() +
theme(
legend.position = 'top',
legend.title = element_blank()
) +
facet_wrap(~ mods, ncol = 2)
ggplotly(p)
Adding flow data to the model:
# smooths to evaluate
smths <- c(
"s(dec_time, bs = 'tp')",
"s(doy, bs = 'cc')",
"s(flo, bs = 'tp')",
"te(flo, doy, bs = c('tp', 'cc'))",
"te(flo, dec_time, bs = c('tp', 'tp'))",
"te(dec_time, doy, bs = c('tp', 'cc'))",
"te(dec_time, doy, flo, bs = c('tp', 'cc', 'tp'))"
)
# get all combinations of smoothers to model, one to many
frms <- list()
for(i in seq_along(smths)){
frm <- combn(smths, i) %>%
apply(2, function(x){
paste(x, collapse = ' + ') %>%
paste('lnchl ~ ', .) %>%
formula
})
frms <- c(frms, frm)
}
# create models from smooth formula combinations
mods2 <- map(frms, function(frm){
gam(frm,
knots = list(doy = c(1, 366)),
data = sl_mod,
na.action = na.exclude
)
})
names(mods2) <- paste0('mod', seq_along(mods2))
Summary stats of best year/season model, year/season/flow model
# best model with only season, year
best1 <- map(mods, AIC) %>%
unlist %>%
which.min %>%
mods[[.]]
# best model with season, year, flow
best2 <- map(mods2, AIC) %>%
unlist %>%
which.min %>%
mods2[[.]]
best <- list(best1 = best1, best2 = best2)
# smoother stats of GAMs
map(best, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>%
enframe %>%
unnest %>%
kable
| name | smoother | edf | Ref.df | F | p.value |
|---|---|---|---|---|---|
| best1 | s(dec_time) | 1.0000001 | 1.000000 | 10.6360338 | 0.0012203 |
| best1 | s(doy) | 4.1645525 | 8.000000 | 5.1577131 | 0.0000000 |
| best1 | te(dec_time,doy) | 2.8523113 | 15.000000 | 0.2807338 | 0.1611397 |
| best2 | s(dec_time) | 1.0000008 | 1.000001 | 1.9293923 | 0.1658109 |
| best2 | s(doy) | 2.9402431 | 8.000000 | 0.7092063 | 0.0456631 |
| best2 | s(flo) | 6.7839630 | 7.863753 | 2.9627043 | 0.0049629 |
| best2 | te(flo,doy) | 2.9616367 | 12.000000 | 0.5845557 | 0.0124682 |
| best2 | te(flo,dec_time) | 0.2050133 | 15.000000 | 0.0193067 | 0.0936091 |
| best2 | te(dec_time,doy) | 0.0000023 | 15.000000 | 0.0000001 | 0.8125709 |
| best2 | te(dec_time,doy,flo) | 19.8642287 | 48.000000 | 1.0091690 | 0.0000325 |
# summary stats of GAMs
map(best, ~ data.frame(
AIC = AIC(.x),
R2 = summary(.x)$r.sq)) %>%
enframe %>%
unnest %>%
kable
| name | AIC | R2 |
|---|---|---|
| best1 | 608.6850 | 0.1711443 |
| best2 | 556.8277 | 0.3351218 |
# predictions
sl_res2 <- map(best, function(x){
pred_dat %>%
mutate(
pred = predict(x, newdata = pred_dat)
)
}) %>%
enframe('mods') %>%
unnest
# plot
p <- ggplot(sl_res2, aes(x = date)) +
geom_point(data = sl_mod, aes(y = lnchl), size = 0.5) +
geom_line(aes(y = pred, colour = mods)) +
theme_bw() +
theme(
legend.position = 'top',
legend.title = element_blank()
)
ggplotly(p)
ptheme <- theme(
axis.title.x = element_blank(),
axis.title.y = element_blank()
)
cols <- 'Spectral'
pb1 <- dynagam(best1, pred_dat, ncol = 1, col_vec = cols) +
ptheme +
theme(legend.position = 'none') +
ggtitle('Best 1')
pb2 <- dynagam(best2, pred_dat, ncol = 1, col_vec = cols) +
ptheme +
ggtitle('Best2')
pleg <- g_legend(pb2)
pb2 <- pb2 +
theme(legend.position = 'none')
grid.arrange(
pleg,
arrangeGrob(pb1, pb2, ncol = 2, bottom = 'lnQ', left = 'lnchl'),
ncol = 1,
heights = c(0.1, 1)
)